Latent Profile Analysis

Author

Sky Taylor

library(tidyverse)
library(tidyLPA)
library(dplyr)
library(kableExtra)

Research Question: What combinations of self-efficacy levels, persistence levels, statistics anxiety, and statistics grades exist?

Data Prep

#read in data
smarvus_data <- here::here("docs/SMARVUS_projects/SMARVUS_dissertations.csv") |> readr::read_csv()

#selecting variables
smarvus_data <- smarvus_data |> 
  dplyr::select(
    country, university, eligibility_stats, degree_major, degree_year, age, gender, attention_amnesty, Q7.1_1:Q7.1_24, Q15.1_1:Q15.1_9, Q16.1_1:Q16.1_7, stats_edu_first_module, stats_edu_module_1, grading_scale, grade
  )

##attention checks
#how many people failed attention amnesty
attention_amnesty_exclusion <- smarvus_data |> 
  dplyr::filter(attention_amnesty == "No" | is.na(attention_amnesty)) |>
  nrow() 
#how many poeple failed the statistics anxiety attention check
anxiety_attention_exclusion <- smarvus_data |> 
  dplyr::filter(Q7.1_24 != "1" | is.na(Q7.1_24)) |>
  nrow() 
#how many people failed the self-efficacy attention check
se_attention_exclusion <- smarvus_data |> 
  dplyr::filter(Q15.1_9 != "4" | is.na(Q15.1_9)) |>
  nrow() 

##eligibility exclusions
#how many poeple failed stats eligibility question
non_stats_exclusion <- smarvus_data |> 
  dplyr::filter(eligibility_stats == "No" | is.na(eligibility_stats)) |>
  nrow() 

#removing exclusions
smarvus_data_exclusions <- smarvus_data |> 
  dplyr::filter(
    attention_amnesty == "Yes",
    Q7.1_24 == "1",
    Q15.1_9 == "4",
    eligibility_stats == "Yes"
    )

#removing those without grade data
smarvus_data_exclusions <- smarvus_data_exclusions |> dplyr::filter(!is.na(grade))
#selecting those with grade data on the scale 0-100
smarvus_data_exclusions <- smarvus_data_exclusions |>  
  filter(
    university == "Anglia Ruskin University" |
    university == "Bournemouth University" | 
    university == "Macquarie University" |
    university == "Queen's University Belfast" |
    university == "Teesside University" |
    university == "University of Bradford" |
    university == "University of Lincoln" |
    university == "University of Southampton" |
    university == "University of Sussex" |
    university == "University of Western Australia"
  )

#turn grade data into numerical values
smarvus_data_exclusions$grade <- parse_number(smarvus_data_exclusions$grade)

Participant Summaries

#age
n_age <- smarvus_data_exclusions |> 
  group_by(age) |> 
  summarise(n = n()) |> 
  mutate(percentage = (n / sum(n)) *100)
n_age
# A tibble: 4 × 3
  age       n percentage
  <chr> <int>      <dbl>
1 18-21   731     85.6  
2 22-25    64      7.49 
3 26+      54      6.32 
4 <NA>      5      0.585
#gender
n_gender <- smarvus_data_exclusions |> 
  group_by(gender) |> 
  summarise(n = n()) |> 
  mutate(percentage = (n / sum(n)) *100)
n_gender
# A tibble: 4 × 3
  gender             n percentage
  <chr>          <int>      <dbl>
1 Another Gender    10      1.17 
2 Female/Woman     717     84.0  
3 Male/Man         126     14.8  
4 <NA>               1      0.117
#country
n_country <- smarvus_data_exclusions |> 
  group_by(country) |> 
  summarise(n = n()) |> 
  mutate(percentage = (n / sum(n)) *100)
n_country
# A tibble: 3 × 3
  country              n percentage
  <chr>            <int>      <dbl>
1 Australia           29       3.40
2 England            733      85.8 
3 Northern Ireland    92      10.8 
#degree major
n_major <- smarvus_data_exclusions |> 
  group_by(degree_major) |> 
  summarise(n = n()) |> 
  mutate(percentage = (n / sum(n)) *100)
n_major
# A tibble: 5 × 3
  degree_major        n percentage
  <chr>           <int>      <dbl>
1 Education           1      0.117
2 Psychology        842     98.6  
3 Sciences            2      0.234
4 Social Sciences     6      0.703
5 <NA>                3      0.351
#degree year
n_year <- smarvus_data_exclusions |> 
  group_by(degree_year) |> 
  summarise(n = n()) |> 
  mutate(percentage = (n / sum(n)) *100)
n_year
# A tibble: 4 × 3
  degree_year     n percentage
  <chr>       <int>      <dbl>
1 1st Year      530     62.1  
2 2nd Year      303     35.5  
3 3rd Year       19      2.22 
4 4th Year        2      0.234
#university
n_uni <- smarvus_data_exclusions |> 
  group_by(university) |> 
  summarise(n = n()) |> 
  mutate(percentage = (n / sum(n)) *100)
n_uni
# A tibble: 10 × 3
   university                          n percentage
   <chr>                           <int>      <dbl>
 1 Anglia Ruskin University           16      1.87 
 2 Bournemouth University            176     20.6  
 3 Macquarie University                6      0.703
 4 Queen's University Belfast         92     10.8  
 5 Teesside University                31      3.63 
 6 University of Bradford             30      3.51 
 7 University of Lincoln              63      7.38 
 8 University of Southampton          83      9.72 
 9 University of Sussex              334     39.1  
10 University of Western Australia    23      2.69 
#first statistics module
n_first <- smarvus_data_exclusions |> 
  group_by(stats_edu_first_module) |> 
  summarise(n = n()) |> 
  mutate(percentage = (n / sum(n)) *100)
n_first
# A tibble: 3 × 3
  stats_edu_first_module     n percentage
  <chr>                  <int>      <dbl>
1 No                       213       24.9
2 Yes                      244       28.6
3 <NA>                     397       46.5

Composite Scores

#recode reverse scored items
composite_data <- smarvus_data_exclusions %>%
  mutate(Q16.1_2 = 6-Q16.1_2,
         Q16.1_3 = 6-Q16.1_3,
         Q16.1_4 = 6-Q16.1_4,
         Q16.1_5 = 6-Q16.1_5,
         Q16.1_7 = 6-Q16.1_7)

#statistics anxiety
composite_data <- composite_data %>%
  mutate(statistics_anxiety = rowMeans(select(., Q7.1_1:Q7.1_23), na.rm = TRUE))
#self-efficacy
composite_data <- composite_data %>%
  mutate(self_efficacy = rowMeans(select(., Q15.1_1:Q15.1_8), na.rm = TRUE))
#persistence
composite_data <- composite_data %>%
  mutate(persistence = rowMeans(select(., Q16.1_1:Q16.1_7), na.rm = TRUE))

data <- composite_data %>%
  select(self_efficacy, statistics_anxiety, persistence, grade)

Latent Profile Analysis

Fit statistics

lpa_mod <- data %>%
    dplyr::select(self_efficacy, statistics_anxiety, persistence, grade) %>% 
    scale() %>%
    tidyLPA::single_imputation(method = "missForest") %>%
    tidyLPA::estimate_profiles(1:6) %>%
    tidyLPA::get_fit()
lpa_mod
# A tibble: 6 × 18
  Model Classes LogLik   AIC   AWE   BIC  CAIC   CLC   KIC SABIC     ICL Entropy
  <dbl>   <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl>   <dbl>
1     1       1 -4845. 9706. 9820. 9744. 9752. 9692. 9717. 9719.  -9744.   1    
2     1       2 -4747. 9519. 9706. 9581. 9594. 9494. 9535. 9539.  -9871.   0.538
3     1       3 -4691. 9417. 9677. 9503. 9521. 9383. 9438. 9446.  -9804.   0.698
4     1       4 -4679. 9403. 9736. 9512. 9535. 9358. 9429. 9439. -10095.   0.562
5     1       5 -4675. 9406. 9811. 9539. 9567. 9351. 9437. 9450. -10371.   0.493
6     1       6 -4666. 9399. 9876. 9555. 9588. 9334. 9435. 9451. -10447.   0.495
# ℹ 6 more variables: prob_min <dbl>, prob_max <dbl>, n_min <dbl>, n_max <dbl>,
#   BLRT_val <dbl>, BLRT_p <dbl>
#formatted table
lpa_mod %>%
  select(Classes, AIC, BIC, SABIC, Entropy, n_min, BLRT_p) %>%
  kable(
    digits = 4,
    col.names = c("Profiles", "AIC", "BIC", "SABIC", "Entropy", "N(Min)", "BLRT(p)")
  ) %>%
  kableExtra::kable_styling()
Profiles AIC BIC SABIC Entropy N(Min) BLRT(p)
1 9706.186 9744.185 9718.779 1.0000 1.0000 NA
2 9519.031 9580.780 9539.496 0.5378 0.3888 0.0099
3 9417.423 9502.922 9445.759 0.6982 0.0164 0.0099
4 9403.198 9512.447 9439.405 0.5619 0.0152 0.0099
5 9405.822 9538.820 9449.900 0.4931 0.0152 0.2178
6 9398.681 9555.428 9450.630 0.4952 0.0152 0.0396

Estimating profiles

#plots and estimates for 2-6 profile solutions
#estimating 2 profiles
lpa_2 <- data %>%
    dplyr::select(self_efficacy, statistics_anxiety, persistence, grade) %>% 
    scale() %>%
    tidyLPA::single_imputation(method = "missForest") %>%
    tidyLPA::estimate_profiles(2)
lpa_2
tidyLPA analysis using mclust: 

 Model Classes AIC     BIC     Entropy prob_min prob_max n_min n_max BLRT_p
 1     2       9519.03 9580.78 0.54    0.80     0.90     0.39  0.61  0.01  
tidyLPA::plot_profiles(lpa_2)

tidyLPA::get_estimates(lpa_2)
# A tibble: 16 × 8
   Category  Parameter          Estimate     se        p Class Model Classes
   <chr>     <chr>                 <dbl>  <dbl>    <dbl> <int> <dbl>   <dbl>
 1 Means     self_efficacy        -0.449 0.168  7.41e- 3     1     1       2
 2 Means     statistics_anxiety    0.468 0.0702 2.70e-11     1     1       2
 3 Means     persistence          -0.783 0.0972 8.01e-16     1     1       2
 4 Means     grade                -0.315 0.330  3.39e- 1     1     1       2
 5 Variances self_efficacy         0.858 0.0544 4.88e-56     1     1       2
 6 Variances statistics_anxiety    0.847 0.0441 3.00e-82     1     1       2
 7 Variances persistence           0.573 0.0537 1.41e-26     1     1       2
 8 Variances grade                 0.930 0.0769 1.16e-33     1     1       2
 9 Means     self_efficacy         0.312 0.0632 7.81e- 7     2     1       2
10 Means     statistics_anxiety   -0.325 0.0675 1.46e- 6     2     1       2
11 Means     persistence           0.544 0.0764 1.05e-12     2     1       2
12 Means     grade                 0.219 0.0449 1.03e- 6     2     1       2
13 Variances self_efficacy         0.858 0.0544 4.88e-56     2     1       2
14 Variances statistics_anxiety    0.847 0.0441 3.00e-82     2     1       2
15 Variances persistence           0.573 0.0537 1.41e-26     2     1       2
16 Variances grade                 0.930 0.0769 1.16e-33     2     1       2
#estimating 3 profiles
lpa_3 <- data %>%
    dplyr::select(self_efficacy, statistics_anxiety, persistence, grade) %>% 
    scale() %>%
    tidyLPA::single_imputation(method = "missForest") %>%
    tidyLPA::estimate_profiles(3)
lpa_3
tidyLPA analysis using mclust: 

 Model Classes AIC     BIC     Entropy prob_min prob_max n_min n_max BLRT_p
 1     3       9417.42 9502.92 0.70    0.80     0.92     0.02  0.59  0.01  
tidyLPA::plot_profiles(lpa_3)

tidyLPA::get_estimates(lpa_3)
# A tibble: 24 × 8
   Category  Parameter          Estimate     se        p Class Model Classes
   <chr>     <chr>                 <dbl>  <dbl>    <dbl> <int> <dbl>   <dbl>
 1 Means     self_efficacy        -0.400 0.0677 3.41e- 9     1     1       3
 2 Means     statistics_anxiety    0.436 0.0717 1.26e- 9     1     1       3
 3 Means     persistence          -0.758 0.0820 2.52e-20     1     1       3
 4 Means     grade                -0.177 0.0539 1.02e- 3     1     1       3
 5 Variances self_efficacy         0.851 0.0502 2.10e-64     1     1       3
 6 Variances statistics_anxiety    0.846 0.0499 1.64e-64     1     1       3
 7 Variances persistence           0.565 0.0460 1.02e-34     1     1       3
 8 Variances grade                 0.743 0.0521 3.97e-46     1     1       3
 9 Means     self_efficacy         0.315 0.0513 7.91e-10     2     1       3
10 Means     statistics_anxiety   -0.330 0.0867 1.41e- 4     2     1       3
# ℹ 14 more rows
#estimating 4 profiles
lpa_4 <- data %>%
    dplyr::select(self_efficacy, statistics_anxiety, persistence, grade) %>% 
    scale() %>%
    tidyLPA::single_imputation(method = "missForest") %>%
    tidyLPA::estimate_profiles(4)
lpa_4
tidyLPA analysis using mclust: 

 Model Classes AIC     BIC     Entropy prob_min prob_max n_min n_max BLRT_p
 1     4       9403.20 9512.45 0.56    0.58     0.90     0.02  0.54  0.02  
tidyLPA::plot_profiles(lpa_4)

tidyLPA::get_estimates(lpa_4)
# A tibble: 32 × 8
   Category  Parameter          Estimate     se        p Class Model Classes
   <chr>     <chr>                 <dbl>  <dbl>    <dbl> <int> <dbl>   <dbl>
 1 Means     self_efficacy        -0.503 0.195  9.74e- 3     1     1       4
 2 Means     statistics_anxiety    0.455 0.0798 1.21e- 8     1     1       4
 3 Means     persistence          -0.945 0.0937 6.58e-24     1     1       4
 4 Means     grade                -0.204 0.0894 2.26e- 2     1     1       4
 5 Variances self_efficacy         0.827 0.0579 2.52e-46     1     1       4
 6 Variances statistics_anxiety    0.771 0.0688 3.37e-29     1     1       4
 7 Variances persistence           0.534 0.0544 9.50e-23     1     1       4
 8 Variances grade                 0.733 0.0543 1.52e-41     1     1       4
 9 Means     self_efficacy         0.567 0.243  1.97e- 2     2     1       4
10 Means     statistics_anxiety   -0.850 0.178  1.70e- 6     2     1       4
# ℹ 22 more rows
#estimating 5 profiles
lpa_5 <- data %>%
    dplyr::select(self_efficacy, statistics_anxiety, persistence, grade) %>% 
    scale() %>%
    tidyLPA::single_imputation(method = "missForest") %>%
    tidyLPA::estimate_profiles(5)
lpa_5
tidyLPA analysis using mclust: 

 Model Classes AIC     BIC     Entropy prob_min prob_max n_min n_max BLRT_p
 1     5       9405.82 9538.82 0.49    0.49     0.89     0.02  0.37  0.19  
tidyLPA::plot_profiles(lpa_5)

tidyLPA::get_estimates(lpa_5)
# A tibble: 40 × 8
   Category  Parameter          Estimate     se        p Class Model Classes
   <chr>     <chr>                 <dbl>  <dbl>    <dbl> <int> <dbl>   <dbl>
 1 Means     self_efficacy      -0.569   0.379  1.34e- 1     1     1       5
 2 Means     statistics_anxiety  0.473   0.116  4.32e- 5     1     1       5
 3 Means     persistence        -0.959   0.152  2.61e-10     1     1       5
 4 Means     grade              -0.169   0.131  1.97e- 1     1     1       5
 5 Variances self_efficacy       0.771   0.127  1.34e- 9     1     1       5
 6 Variances statistics_anxiety  0.802   0.0957 5.27e-17     1     1       5
 7 Variances persistence         0.519   0.0878 3.34e- 9     1     1       5
 8 Variances grade               0.693   0.0570 5.72e-34     1     1       5
 9 Means     self_efficacy      -0.00743 0.594  9.90e- 1     2     1       5
10 Means     statistics_anxiety -0.171   0.354  6.29e- 1     2     1       5
# ℹ 30 more rows
#estimating 6 profiles
lpa_6 <- data %>%
    dplyr::select(self_efficacy, statistics_anxiety, persistence, grade) %>% 
    scale() %>%
    tidyLPA::single_imputation(method = "missForest") %>%
    tidyLPA::estimate_profiles(6)
lpa_6
tidyLPA analysis using mclust: 

 Model Classes AIC     BIC     Entropy prob_min prob_max n_min n_max BLRT_p
 1     6       9398.68 9555.43 0.50    0.49     0.91     0.02  0.43  0.02  
tidyLPA::plot_profiles(lpa_6)

tidyLPA::get_estimates(lpa_6)
# A tibble: 48 × 8
   Category  Parameter          Estimate     se        p Class Model Classes
   <chr>     <chr>                 <dbl>  <dbl>    <dbl> <int> <dbl>   <dbl>
 1 Means     self_efficacy        0.107  0.326  7.42e- 1     1     1       6
 2 Means     statistics_anxiety   0.515  0.163  1.55e- 3     1     1       6
 3 Means     persistence         -0.894  0.216  3.42e- 5     1     1       6
 4 Means     grade               -0.215  0.103  3.72e- 2     1     1       6
 5 Variances self_efficacy        0.571  0.0929 8.01e-10     1     1       6
 6 Variances statistics_anxiety   0.799  0.0887 2.02e-19     1     1       6
 7 Variances persistence          0.551  0.0744 1.26e-13     1     1       6
 8 Variances grade                0.696  0.0648 6.31e-27     1     1       6
 9 Means     self_efficacy       -0.0853 0.797  9.15e- 1     2     1       6
10 Means     statistics_anxiety  -0.206  0.433  6.33e- 1     2     1       6
# ℹ 38 more rows

Different Covariance Structures

#model 1: variances equal, covariances zero
mod1 <- data %>%
    dplyr::select(self_efficacy, statistics_anxiety, persistence, grade) %>% 
    scale() %>%
    tidyLPA::single_imputation(method = "missForest") %>%
    tidyLPA::estimate_profiles(3)
tidyLPA::plot_profiles(mod1)

#model 2: variances varying, covariances zero
mod2 <- data %>%
    dplyr::select(self_efficacy, statistics_anxiety, persistence, grade) %>% 
    scale() %>%
    tidyLPA::single_imputation(method = "missForest") %>%
    tidyLPA::estimate_profiles(3, variances = "varying", covariances = "zero")
tidyLPA::plot_profiles(mod2)

#model 3: equal variances, equal covariances
mod3 <- data %>%
    dplyr::select(self_efficacy, statistics_anxiety, persistence, grade) %>% 
    scale() %>%
    tidyLPA::single_imputation(method = "missForest") %>%
    tidyLPA::estimate_profiles(3, variances = "equal", covariances = "equal")
tidyLPA::plot_profiles(mod3)

#model 6: varying variances, varying covariances
mod6 <- data %>%
    dplyr::select(self_efficacy, statistics_anxiety, persistence, grade) %>% 
    scale() %>%
    tidyLPA::single_imputation(method = "missForest") %>%
    tidyLPA::estimate_profiles(3, variances = "varying", covariances = "varying")
tidyLPA::plot_profiles(mod6)

#comparing models
mod_table <- data %>%
    dplyr::select(self_efficacy, statistics_anxiety, persistence, grade) %>% 
    scale() %>%
    tidyLPA::single_imputation(method = "missForest") %>%
    tidyLPA::estimate_profiles(3, models = c(1,2,3,6))
mod_table
tidyLPA analysis using mclust: 

 Model Classes AIC     BIC     Entropy prob_min prob_max n_min n_max BLRT_p
 1     3       9417.42 9502.92 0.70    0.80     0.92     0.02  0.59  0.01  
 2     3       9363.15 9486.65 0.57    0.67     0.87     0.20  0.55  0.01  
 3     3       9385.31 9499.31 0.60    0.65     0.88     0.04  0.67  0.01  
 6     3       9304.35 9513.35 0.50    0.45     0.86     0.10  0.68  0.01  
get_fit(mod_table)
# A tibble: 4 × 18
  Model Classes LogLik   AIC   AWE   BIC  CAIC   CLC   KIC SABIC     ICL Entropy
  <dbl>   <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl>   <dbl>
1     1       3 -4691. 9417. 9677. 9503. 9521. 9383. 9438. 9446.  -9804.   0.698
2     2       3 -4656. 9363. 9739. 9487. 9513. 9312. 9392. 9404.  -9920.   0.566
3     3       3 -4669. 9385. 9732. 9499. 9523. 9339. 9412. 9423.  -9912.   0.602
4     6       3 -4608. 9304. 9941. 9513. 9557. 9217. 9351. 9374. -10042.   0.502
# ℹ 6 more variables: prob_min <dbl>, prob_max <dbl>, n_min <dbl>, n_max <dbl>,
#   BLRT_val <dbl>, BLRT_p <dbl>
plot_profiles(mod_table, ci = NULL)

#formatted table
mod_table %>%
  get_fit() %>%
  select(Model, AIC, BIC, SABIC, Entropy, n_min, BLRT_p) %>%
  kable(
    digits = 4,
    col.names = c("Model", "AIC", "BIC", "SABIC", "Entropy", "N(Min)", "BLRT(p)")
  ) %>%
  kableExtra::kable_styling()
Model AIC BIC SABIC Entropy N(Min) BLRT(p)
1 9417.423 9502.922 9445.759 0.6982 0.0164 0.0099
2 9363.149 9486.647 9404.079 0.5663 0.1956 0.0099
3 9385.311 9499.309 9423.092 0.6021 0.0363 0.0099
6 9304.348 9513.345 9373.614 0.5023 0.0995 0.0099
#retained model
get_estimates(mod2)
# A tibble: 24 × 8
   Category  Parameter          Estimate     se        p Class Model Classes
   <chr>     <chr>                 <dbl>  <dbl>    <dbl> <int> <dbl>   <dbl>
 1 Means     self_efficacy       -0.0614 0.124  6.20e- 1     1     2       3
 2 Means     statistics_anxiety   0.305  0.102  2.64e- 3     1     2       3
 3 Means     persistence         -0.447  0.119  1.82e- 4     1     2       3
 4 Means     grade               -0.0527 0.0295 7.39e- 2     1     2       3
 5 Variances self_efficacy        0.661  0.151  1.26e- 5     1     2       3
 6 Variances statistics_anxiety   0.651  0.0998 7.09e-11     1     2       3
 7 Variances persistence          0.918  0.144  2.08e-10     1     2       3
 8 Variances grade                0.0409 0.0124 9.60e- 4     1     2       3
 9 Means     self_efficacy        0.352  0.0689 3.22e- 7     2     2       3
10 Means     statistics_anxiety  -0.378  0.0685 3.40e- 8     2     2       3
# ℹ 14 more rows
#calculating the size of each profile
profile <- get_data(mod2)
profile
# A tibble: 854 × 10
   model_number classes_number self_efficacy statistics_anxiety persistence
          <dbl>          <dbl>         <dbl>              <dbl>       <dbl>
 1            2              3       -0.622            1.04          0.0239
 2            2              3       -0.433           -0.000907     -1.06  
 3            2              3       -0.243           -0.111         1.11  
 4            2              3       -1.38            -0.165        -1.93  
 5            2              3       -2.14             2.36         -1.06  
 6            2              3        0.892            1.97         -0.845 
 7            2              3        2.41            -1.92          0.893 
 8            2              3        1.65             0.273        -1.71  
 9            2              3        0.135            1.70         -0.845 
10            2              3       -0.0541          -0.220         0.459 
# ℹ 844 more rows
# ℹ 5 more variables: grade <dbl>, CPROB1 <dbl>, CPROB2 <dbl>, CPROB3 <dbl>,
#   Class <dbl>
profile_data <- data

profile_data$mod2_profile <- profile$Class

profile_data
# A tibble: 854 × 5
   self_efficacy statistics_anxiety persistence grade mod2_profile
           <dbl>              <dbl>       <dbl> <dbl>        <dbl>
 1          3                  3.91        3.43    66            1
 2          3.12               3.09        2.71    84            3
 3          3.25               3           4.14    70            2
 4          2.5                2.96        2.14    88            3
 5          2                  4.96        2.71    15            3
 6          4                  4.65        2.86    82            3
 7          5                  1.57        4       82            2
 8          4.5                3.30        2.29    67            1
 9          3.5                4.43        2.86    76            3
10          3.38               2.91        3.71    62            2
# ℹ 844 more rows
count(profile_data, vars = mod2_profile)
# A tibble: 3 × 2
   vars     n
  <dbl> <int>
1     1   214
2     2   473
3     3   167

Descriptive Statistics

Means and Standard Deviations

descriptives <- profile_data %>%
  dplyr::group_by(mod2_profile) %>%
  dplyr::summarise(
    se_mean = mean(self_efficacy),
    se_sd = sd(self_efficacy),
    sa_mean = mean(statistics_anxiety),
    sa_sd = sd(statistics_anxiety),
    p_mean = mean(persistence),
    p_sd = sd(persistence),
    g_mean = mean(grade),
    g_sd = sd(grade)
  )
descriptives %>%
  kable() %>%
  kableExtra::kable_styling()
mod2_profile se_mean se_sd sa_mean sa_sd p_mean p_sd g_mean g_sd
1 3.313084 0.5155658 3.408980 0.5991556 3.033378 0.5585674 65.34607 1.970737
2 3.674947 0.5399640 2.764918 0.7498583 3.793718 0.4724677 68.87455 10.421018
3 2.787425 0.6845865 3.590104 0.7006161 2.820359 0.4830452 58.34844 16.905470

Correlations

#correlation coefficients within each profile
profile_data %>%
  filter(mod2_profile == 1) %>%
  select(self_efficacy, statistics_anxiety, persistence, grade) %>%
  cor(method="pearson") %>%
  kable() %>%
  kable_styling()
self_efficacy statistics_anxiety persistence grade
self_efficacy 1.0000000 0.1213468 -0.1549441 -0.0448416
statistics_anxiety 0.1213468 1.0000000 0.1216082 -0.0589335
persistence -0.1549441 0.1216082 1.0000000 0.0390405
grade -0.0448416 -0.0589335 0.0390405 1.0000000
profile_data %>%
  filter(mod2_profile == 2) %>%
  select(self_efficacy, statistics_anxiety, persistence, grade) %>%
  cor(method="pearson")%>%
  kable() %>%
  kable_styling()
self_efficacy statistics_anxiety persistence grade
self_efficacy 1.0000000 -0.0736568 0.0591574 -0.1250761
statistics_anxiety -0.0736568 1.0000000 -0.0006716 -0.0073337
persistence 0.0591574 -0.0006716 1.0000000 0.0778946
grade -0.1250761 -0.0073337 0.0778946 1.0000000
profile_data %>%
  filter(mod2_profile == 3) %>%
  select(self_efficacy, statistics_anxiety, persistence, grade) %>%
  cor(method="pearson")%>%
  kable() %>%
  kable_styling()
self_efficacy statistics_anxiety persistence grade
self_efficacy 1.0000000 0.0918541 -0.0491616 -0.1646897
statistics_anxiety 0.0918541 1.0000000 -0.0615996 -0.0445675
persistence -0.0491616 -0.0615996 1.0000000 -0.0016742
grade -0.1646897 -0.0445675 -0.0016742 1.0000000
#correlation coefficients within the full sample
profile_data %>%
  select(self_efficacy, statistics_anxiety, persistence, grade) %>%
  cor(method="pearson")%>%
  kable() %>%
  kable_styling()
self_efficacy statistics_anxiety persistence grade
self_efficacy 1.0000000 -0.2195859 0.3028001 0.0787260
statistics_anxiety -0.2195859 1.0000000 -0.2896573 -0.1609535
persistence 0.3028001 -0.2896573 1.0000000 0.2345787
grade 0.0787260 -0.1609535 0.2345787 1.0000000
#correlation plots
profile_data %>%
  filter(mod2_profile == 1) %>%
  select(self_efficacy, statistics_anxiety, persistence, grade) %>%
GGally::ggscatmat()

profile_data %>%
  filter(mod2_profile == 2) %>%
  select(self_efficacy, statistics_anxiety, persistence, grade) %>%
GGally::ggscatmat()

profile_data %>%
  filter(mod2_profile == 3) %>%
  select(self_efficacy, statistics_anxiety, persistence, grade) %>%
GGally::ggscatmat()

profile_data %>%
  select(self_efficacy, statistics_anxiety, persistence, grade) %>%
GGally::ggscatmat()

#significance of correlations
profile_data %>%
  filter(mod2_profile == 1) %>%
  select(self_efficacy, statistics_anxiety, persistence, grade) %>%
  as.matrix() %>%
  Hmisc::rcorr(type = "pearson")
                   self_efficacy statistics_anxiety persistence grade
self_efficacy               1.00               0.12       -0.15 -0.04
statistics_anxiety          0.12               1.00        0.12 -0.06
persistence                -0.15               0.12        1.00  0.04
grade                      -0.04              -0.06        0.04  1.00

n= 214 


P
                   self_efficacy statistics_anxiety persistence grade 
self_efficacy                    0.0765             0.0234      0.5141
statistics_anxiety 0.0765                           0.0759      0.3910
persistence        0.0234        0.0759                         0.5700
grade              0.5141        0.3910             0.5700            
profile_data %>%
  filter(mod2_profile == 2) %>%
  select(self_efficacy, statistics_anxiety, persistence, grade) %>%
  as.matrix() %>%
  Hmisc::rcorr(type = "pearson")
                   self_efficacy statistics_anxiety persistence grade
self_efficacy               1.00              -0.07        0.06 -0.13
statistics_anxiety         -0.07               1.00        0.00 -0.01
persistence                 0.06               0.00        1.00  0.08
grade                      -0.13              -0.01        0.08  1.00

n= 473 


P
                   self_efficacy statistics_anxiety persistence grade 
self_efficacy                    0.1096             0.1990      0.0065
statistics_anxiety 0.1096                           0.9884      0.8736
persistence        0.1990        0.9884                         0.0906
grade              0.0065        0.8736             0.0906            
profile_data %>%
  filter(mod2_profile == 3) %>%
  select(self_efficacy, statistics_anxiety, persistence, grade) %>%
  as.matrix() %>%
  Hmisc::rcorr(type = "pearson")
                   self_efficacy statistics_anxiety persistence grade
self_efficacy               1.00               0.09       -0.05 -0.16
statistics_anxiety          0.09               1.00       -0.06 -0.04
persistence                -0.05              -0.06        1.00  0.00
grade                      -0.16              -0.04        0.00  1.00

n= 167 


P
                   self_efficacy statistics_anxiety persistence grade 
self_efficacy                    0.2378             0.5281      0.0334
statistics_anxiety 0.2378                           0.4291      0.5674
persistence        0.5281        0.4291                         0.9829
grade              0.0334        0.5674             0.9829            
profile_data %>%
  select(self_efficacy, statistics_anxiety, persistence, grade) %>%
  as.matrix() %>%
  Hmisc::rcorr( type = "pearson")
                   self_efficacy statistics_anxiety persistence grade
self_efficacy               1.00              -0.22        0.30  0.08
statistics_anxiety         -0.22               1.00       -0.29 -0.16
persistence                 0.30              -0.29        1.00  0.23
grade                       0.08              -0.16        0.23  1.00

n= 854 


P
                   self_efficacy statistics_anxiety persistence grade 
self_efficacy                    0.0000             0.0000      0.0214
statistics_anxiety 0.0000                           0.0000      0.0000
persistence        0.0000        0.0000                         0.0000
grade              0.0214        0.0000             0.0000            

Profile Plot

estimates_table <- get_estimates(mod2) %>%
  filter(Category == "Means") %>%
  select("Parameter", "Estimate", "Class")
  
estimates_table$Class <- cut(estimates_table$Class, breaks = c(0,1,2,3), labels = c('1','2','3'))
  
estimates_table %>%
ggplot(aes(x = factor(Parameter, levels=c('statistics_anxiety','persistence','self_efficacy', 'grade' )), y = Estimate, group = Class, colour = Class)) +  geom_point() + geom_line() +
    scale_x_discrete(name="Variable", 
                    labels=c("Statistics Anxiety", "Persistence", "Self-Efficacy", "Grade")) +
  scale_y_continuous(name="Mean Value (Centred)", 
                    limits=c(-1, 1), 
                    n.breaks=10) +
  labs(colour="Profile") +
  theme_classic()